#######################################################################
# LINEAR PROBABILITY MODELS OF RESCUE MODES ON RESCUER HOUSEHOLD TYPE #
#######################################################################
# Author: Kasia Nalewajko
# First created: 6 February 2023
# Replicated: 8 June 2024

rm(list = ls())

# LOAD PACKAGES -----------------------------------------------------------

if (!require("dplyr")) install.packages("dplyr")
if (!require("tidyr")) install.packages("tidyr")
if (!require("stringr")) install.packages("stringr")
if (!require("lmtest")) install.packages("lmtest")
if (!require("sandwich")) install.packages("sandwich")
if (!require("ggplot2")) install.packages("ggplot2")

# LOAD DATA ---------------------------------------------------------------

load("./00 SUBMITTED/00 APSR final/04 replication_files/01 data/RANfrance.Rda")

# RUN THE MODEL IN A LOOP ---------------------------------------------------------------

# Generate vector of outcomes
outcomes <- c("hiding",
              "finding_shelter",
              "forging_documents",
              "organising_evasion")

# Create empty containers
estimate_contact <- rep(NA, 3*length(outcomes))
estimate_resister <- rep(NA, 3*length(outcomes))
std_error_contact <- rep(NA, 3*length(outcomes))
std_error_resister <- rep(NA, 3*length(outcomes))
model <- rep(NA, 3*length(outcomes))
models_list <- list()
n <- length(outcomes)

# Loop over outcomes
for(i in 1:length(outcomes)){
  # Models with no controls
  models_list[[i]] <- lm(as.formula(paste(outcomes[i],"~ type")), data = frran_hh)
  robust_se <- coeftest(models_list[[i]], vcov = vcovHC(models_list[[i]], type = "HC1"))
  estimate_contact[[i]] <- robust_se[2, 1]
  estimate_resister[[i]] <- robust_se[3, 1]
  std_error_contact[[i]] <- robust_se[2, 2]
  std_error_resister[[i]] <- robust_se[3, 2]
  model[[i]] <- "Bivariate"
  
  # Models with province FE
  models_list[[i + n]] <- lm(as.formula(paste(outcomes[i],"~ type + as.factor(dept_code)")), data = frran_hh)
  robust_se <- coeftest(models_list[[i + n]], vcov = vcovHC(models_list[[i + n]], type = "HC1"))
  estimate_contact[[i + n]] <- robust_se[2, 1]
  estimate_resister[[i + n]] <- robust_se[3, 1]
  std_error_contact[[i + n]] <- robust_se[2, 2]
  std_error_resister[[i + n]] <- robust_se[3, 2]
  model[[i + n]] <- "Province FE"
  
  # Models with county FE
  models_list[[i + n + n]] <- lm(as.formula(paste(outcomes[i],"~ type + as.factor(deppct)")), data = frran_hh)
  robust_se <- coeftest(models_list[[i + n + n]], vcov = vcovHC(models_list[[i + n + n]], type = "HC1"))
  estimate_contact[[i + n + n]] <- robust_se[2, 1]
  estimate_resister[[i + n + n]] <- robust_se[3, 1]
  std_error_contact[[i + n + n]] <- robust_se[2, 2]
  std_error_resister[[i + n + n]] <- robust_se[3, 2]
  model[[i + n + n]] <- "County FE"
  
  print(i)
}

# ASSEMBLE AND CLEAN DATA FRAMES FOR PLOTTING -----

models_df <- data.frame(
  model = model,
  outcome = rep(outcomes, times = 3),
  estimate_contact = estimate_contact,
  std_error_contact = std_error_contact,
  estimate_resister = estimate_resister,
  std_error_resister = std_error_resister,
  ci_hi_95_contact = estimate_contact + 1.96 * std_error_contact,
  ci_lo_95_contact = estimate_contact - 1.96 * std_error_contact,
  ci_hi_90_contact = estimate_contact + 1.645 * std_error_contact,
  ci_lo_90_contact = estimate_contact - 1.645 * std_error_contact,
  ci_hi_95_resister = estimate_resister + 1.96 * std_error_resister,
  ci_lo_95_resister = estimate_resister - 1.96 * std_error_resister,
  ci_hi_90_resister = estimate_resister + 1.645 * std_error_resister,
  ci_lo_90_resister = estimate_resister - 1.645 * std_error_resister
)

models_df <- models_df %>%
  gather(key, value, -model, -outcome) %>%
  tidyr::extract(col = key, into = c("statistic", "hh_type"), regex = "([\\w*\\d*_]*)_([a-z]*)$") %>%
  spread(statistic, value)

models_df$hh_type <- stringr::str_replace_all(models_df$hh_type, pattern = "^contact$", replacement = "Rescuer in contact with insurgents")
models_df$hh_type <- stringr::str_replace_all(models_df$hh_type, pattern = "^resister$", replacement = "Rescuer-insurgent")

models_df$outcome <- str_replace_all(models_df$outcome, pattern = "_", replacement = " ")


# PLOT ---------------------------------------------------------------

models_df %>%
  ggplot(aes(x = reorder(outcome, -estimate), 
             y = estimate, 
             color = factor(model, levels = c("Bivariate", "Province FE", "County FE")), 
             shape = factor(model, levels = c("Bivariate", "Province FE", "County FE"))
  )) +
  geom_hline(aes(yintercept = 0), linetype = 2, color = "gray20") +
  # coord_flip(ylim = c(-1.5, 1.5)) +
  geom_linerange(aes(ymin = ci_lo_95, ymax = ci_hi_95), 
                 position = position_dodge(width = 0.1),
                 size = 0.5) +
  geom_linerange(aes(ymin = ci_lo_90, ymax = ci_hi_90), 
                 position = position_dodge(width = 0.1),
                 size = 1) +
  geom_point(size = 1.75
             , position = position_dodge(width = 0.1)
  ) +
  facet_wrap(~ hh_type) +
  theme_bw() +
  theme(axis.text=element_text(size=8),
        legend.title = element_blank(),
        legend.key = element_rect(fill = NA, color = NA), 
        legend.background = element_rect(fill = NA, color = NA),
        panel.background = element_rect(fill = NA, color = "black")
  ) +
  labs(
    x = "Outcomes",
    y = "Estimates"
  ) +
  scale_color_viridis_d(option = "inferno", begin = 0.2, end = 0.85) +
  coord_flip()

# SAVE ---------------------------------------------------------------

ggsave(
  "4_LPM_mechs_France.png",
  plot = last_plot(),
  path = "./00 SUBMITTED/00 APSR final/01 main_text/figures",
  width = 20,
  height = 10,
  units = "cm",
  dpi = 300
)





